{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": "                             id  action              corpid  \\\n0      6558e5cf396bd603314d97a1    send  ww19785ecf93c94bb5   \n1      6558e5f2396bd603314d97b1    send  ww19785ecf93c94bb5   \n2      6558e601396bd603314d97bc    send  ww19785ecf93c94bb5   \n3      6558edf40e5f7766c37b64f0    send  ww8ba2db9baa1db2b7   \n4      6558ee080e5f7766c37b64f6    send  ww8ba2db9baa1db2b7   \n...                         ...     ...                 ...   \n14180  655a1dc80e5f7766c385e750    send  ww8ba2db9baa1db2b7   \n14181  655a1dd70e5f7766c385e77a    send  ww8ba2db9baa1db2b7   \n14182  655a1de10e5f7766c385e791  recall  ww8ba2db9baa1db2b7   \n14183  655a1df00e5f7766c385e7a8    send  ww8ba2db9baa1db2b7   \n14184  655a21470e5f7766c385eed0    send  ww8ba2db9baa1db2b7   \n\n                                   from  \\\n0      wmX07pZAAALofZ2oVhW0GI97YTy_R5MA   \n1      wmX07pZAAALofZ2oVhW0GI97YTy_R5MA   \n2      wmX07pZAAALofZ2oVhW0GI97YTy_R5MA   \n3      wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A   \n4      wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A   \n...                                 ...   \n14180  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14181  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14182  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14183  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14184                           1030116   \n\n                                             msgid        msgtime msgtype  \\\n0      16148119414446279788_1700324811357_external  1700324807115    text   \n1       7916821743428793578_1700324846292_external  1700324842683    text   \n2       3035325536151975488_1700324864304_external  1700324860268    text   \n3      14218631375976575876_1700326897212_external  1700326893076    text   \n4       9305790332453681482_1700326918647_external  1700326914156    text   \n...                                            ...            ...     ...   \n14180   2582321746986539159_1700404676528_external  1700404672336    text   \n14181    661026385661028883_1700404692281_external  1700404688136    text   \n14182   4110560313210601338_1700404700545_external  1700404696227  revoke   \n14183  13406161714446366411_1700404715981_external  1700404711721    text   \n14184   9462963768104092837_1700405571209_external  1700405567394    text   \n\n                                 roomid  \\\n0      wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n1      wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n2      wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n3      wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ   \n4      wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ   \n...                                 ...   \n14180  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14181  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14182  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14183  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14184  wrrK4lBgAAChc8CKkTPxghiMLvhSzjPQ   \n\n                                                  tolist  \\\n0      wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...   \n1      wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...   \n2      wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...   \n3      1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...   \n4      1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...   \n...                                                  ...   \n14180  LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...   \n14181  LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...   \n14182  LiuAiJun,1025794,worK4lBgAATWXyibofqSVFAD5c2_w...   \n14183  LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...   \n14184  10322874,1032337,1032310,XinYu,1021838,1021234...   \n\n                                                 content            date_time  \\\n0      1、装货地：东省佛山市嘉实多护车保养站里官路店@#_#@_2、目的地：广东省东莞市大岭山信立...  2023-11-19 00:26:47   \n1      1、装货地：福建省厦门市敦煌实业@#_#@_2、目的地：福建省福州市冠闽产业园@#_#@_@...  2023-11-19 00:27:22   \n2      1、装货地：福建省福州市冠闽产业园@#_#@_2、目的地：福建省厦门市敦煌实业@#_#@_@...  2023-11-19 00:27:40   \n3                                      36825401541870352  2023-11-19 01:01:33   \n4                                            这单我没接单，怎么取消  2023-11-19 01:01:54   \n...                                                  ...                  ...   \n14180             @在线时间(9:00-21:30) 今晚让司机把车停在停车场保管吧，给停车费  2023-11-19 22:37:52   \n14181        @在线时间(9:00-21:30) 今天的等候费，压车费，明天送回给货主的费用是多少？  2023-11-19 22:38:08   \n14182  {\"tolist\":[\"LiuAiJun\",\"1025794\",\"worK4lBgAATWX...  2023-11-19 22:38:16   \n14183    @在线时间(9:00-21:30) 今天的等候费，压车费，停车费，明天送回给货主的费用是多少？  2023-11-19 22:38:31   \n14184                     您打开订单，右上角有个投诉，点击投诉，让客诉人员介入给您处理  2023-11-19 22:52:47   \n\n            day  \n0      20231119  \n1      20231119  \n2      20231119  \n3      20231119  \n4      20231119  \n...         ...  \n14180  20231119  \n14181  20231119  \n14182  20231119  \n14183  20231119  \n14184  20231119  \n\n[14185 rows x 12 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>id</th>\n      <th>action</th>\n      <th>corpid</th>\n      <th>from</th>\n      <th>msgid</th>\n      <th>msgtime</th>\n      <th>msgtype</th>\n      <th>roomid</th>\n      <th>tolist</th>\n      <th>content</th>\n      <th>date_time</th>\n      <th>day</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6558e5cf396bd603314d97a1</td>\n      <td>send</td>\n      <td>ww19785ecf93c94bb5</td>\n      <td>wmX07pZAAALofZ2oVhW0GI97YTy_R5MA</td>\n      <td>16148119414446279788_1700324811357_external</td>\n      <td>1700324807115</td>\n      <td>text</td>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...</td>\n      <td>1、装货地：东省佛山市嘉实多护车保养站里官路店@#_#@_2、目的地：广东省东莞市大岭山信立...</td>\n      <td>2023-11-19 00:26:47</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6558e5f2396bd603314d97b1</td>\n      <td>send</td>\n      <td>ww19785ecf93c94bb5</td>\n      <td>wmX07pZAAALofZ2oVhW0GI97YTy_R5MA</td>\n      <td>7916821743428793578_1700324846292_external</td>\n      <td>1700324842683</td>\n      <td>text</td>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...</td>\n      <td>1、装货地：福建省厦门市敦煌实业@#_#@_2、目的地：福建省福州市冠闽产业园@#_#@_@...</td>\n      <td>2023-11-19 00:27:22</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>6558e601396bd603314d97bc</td>\n      <td>send</td>\n      <td>ww19785ecf93c94bb5</td>\n      <td>wmX07pZAAALofZ2oVhW0GI97YTy_R5MA</td>\n      <td>3035325536151975488_1700324864304_external</td>\n      <td>1700324860268</td>\n      <td>text</td>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...</td>\n      <td>1、装货地：福建省福州市冠闽产业园@#_#@_2、目的地：福建省厦门市敦煌实业@#_#@_@...</td>\n      <td>2023-11-19 00:27:40</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>6558edf40e5f7766c37b64f0</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A</td>\n      <td>14218631375976575876_1700326897212_external</td>\n      <td>1700326893076</td>\n      <td>text</td>\n      <td>wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ</td>\n      <td>1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...</td>\n      <td>36825401541870352</td>\n      <td>2023-11-19 01:01:33</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>6558ee080e5f7766c37b64f6</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A</td>\n      <td>9305790332453681482_1700326918647_external</td>\n      <td>1700326914156</td>\n      <td>text</td>\n      <td>wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ</td>\n      <td>1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...</td>\n      <td>这单我没接单，怎么取消</td>\n      <td>2023-11-19 01:01:54</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>14180</th>\n      <td>655a1dc80e5f7766c385e750</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>2582321746986539159_1700404676528_external</td>\n      <td>1700404672336</td>\n      <td>text</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...</td>\n      <td>@在线时间(9:00-21:30) 今晚让司机把车停在停车场保管吧，给停车费</td>\n      <td>2023-11-19 22:37:52</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14181</th>\n      <td>655a1dd70e5f7766c385e77a</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>661026385661028883_1700404692281_external</td>\n      <td>1700404688136</td>\n      <td>text</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...</td>\n      <td>@在线时间(9:00-21:30) 今天的等候费，压车费，明天送回给货主的费用是多少？</td>\n      <td>2023-11-19 22:38:08</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14182</th>\n      <td>655a1de10e5f7766c385e791</td>\n      <td>recall</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>4110560313210601338_1700404700545_external</td>\n      <td>1700404696227</td>\n      <td>revoke</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,worK4lBgAATWXyibofqSVFAD5c2_w...</td>\n      <td>{\"tolist\":[\"LiuAiJun\",\"1025794\",\"worK4lBgAATWX...</td>\n      <td>2023-11-19 22:38:16</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14183</th>\n      <td>655a1df00e5f7766c385e7a8</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>13406161714446366411_1700404715981_external</td>\n      <td>1700404711721</td>\n      <td>text</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...</td>\n      <td>@在线时间(9:00-21:30) 今天的等候费，压车费，停车费，明天送回给货主的费用是多少？</td>\n      <td>2023-11-19 22:38:31</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14184</th>\n      <td>655a21470e5f7766c385eed0</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>1030116</td>\n      <td>9462963768104092837_1700405571209_external</td>\n      <td>1700405567394</td>\n      <td>text</td>\n      <td>wrrK4lBgAAChc8CKkTPxghiMLvhSzjPQ</td>\n      <td>10322874,1032337,1032310,XinYu,1021838,1021234...</td>\n      <td>您打开订单，右上角有个投诉，点击投诉，让客诉人员介入给您处理</td>\n      <td>2023-11-19 22:52:47</td>\n      <td>20231119</td>\n    </tr>\n  </tbody>\n</table>\n<p>14185 rows × 12 columns</p>\n</div>"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "file = r'C:\\Users\\fengfeng.qiu\\Downloads\\20231120.csv'\n",
    "df = pd.read_csv(file,sep='\\001')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "str"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(\"\",inplace=True)\n",
    "type(df[\"roomid\"][0])"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "                             id  action              corpid  \\\n0      6558e5cf396bd603314d97a1    send  ww19785ecf93c94bb5   \n1      6558e5f2396bd603314d97b1    send  ww19785ecf93c94bb5   \n2      6558e601396bd603314d97bc    send  ww19785ecf93c94bb5   \n3      6558edf40e5f7766c37b64f0    send  ww8ba2db9baa1db2b7   \n4      6558ee080e5f7766c37b64f6    send  ww8ba2db9baa1db2b7   \n...                         ...     ...                 ...   \n14180  655a1dc80e5f7766c385e750    send  ww8ba2db9baa1db2b7   \n14181  655a1dd70e5f7766c385e77a    send  ww8ba2db9baa1db2b7   \n14182  655a1de10e5f7766c385e791  recall  ww8ba2db9baa1db2b7   \n14183  655a1df00e5f7766c385e7a8    send  ww8ba2db9baa1db2b7   \n14184  655a21470e5f7766c385eed0    send  ww8ba2db9baa1db2b7   \n\n                                   from  \\\n0      wmX07pZAAALofZ2oVhW0GI97YTy_R5MA   \n1      wmX07pZAAALofZ2oVhW0GI97YTy_R5MA   \n2      wmX07pZAAALofZ2oVhW0GI97YTy_R5MA   \n3      wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A   \n4      wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A   \n...                                 ...   \n14180  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14181  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14182  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14183  wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA   \n14184                           1030116   \n\n                                             msgid        msgtime msgtype  \\\n0      16148119414446279788_1700324811357_external  1700324807115    text   \n1       7916821743428793578_1700324846292_external  1700324842683    text   \n2       3035325536151975488_1700324864304_external  1700324860268    text   \n3      14218631375976575876_1700326897212_external  1700326893076    text   \n4       9305790332453681482_1700326918647_external  1700326914156    text   \n...                                            ...            ...     ...   \n14180   2582321746986539159_1700404676528_external  1700404672336    text   \n14181    661026385661028883_1700404692281_external  1700404688136    text   \n14182   4110560313210601338_1700404700545_external  1700404696227  revoke   \n14183  13406161714446366411_1700404715981_external  1700404711721    text   \n14184   9462963768104092837_1700405571209_external  1700405567394    text   \n\n                                 roomid  \\\n0      wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n1      wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n2      wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n3      wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ   \n4      wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ   \n...                                 ...   \n14180  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14181  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14182  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14183  wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA   \n14184  wrrK4lBgAAChc8CKkTPxghiMLvhSzjPQ   \n\n                                                  tolist  \\\n0      wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...   \n1      wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...   \n2      wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...   \n3      1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...   \n4      1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...   \n...                                                  ...   \n14180  LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...   \n14181  LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...   \n14182  LiuAiJun,1025794,worK4lBgAATWXyibofqSVFAD5c2_w...   \n14183  LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...   \n14184  10322874,1032337,1032310,XinYu,1021838,1021234...   \n\n                                                 content            date_time  \\\n0      1、装货地：东省佛山市嘉实多护车保养站里官路店@#_#@_2、目的地：广东省东莞市大岭山信立...  2023-11-19 00:26:47   \n1      1、装货地：福建省厦门市敦煌实业@#_#@_2、目的地：福建省福州市冠闽产业园@#_#@_@...  2023-11-19 00:27:22   \n2      1、装货地：福建省福州市冠闽产业园@#_#@_2、目的地：福建省厦门市敦煌实业@#_#@_@...  2023-11-19 00:27:40   \n3                                      36825401541870352  2023-11-19 01:01:33   \n4                                            这单我没接单，怎么取消  2023-11-19 01:01:54   \n...                                                  ...                  ...   \n14180             @在线时间(9:00-21:30) 今晚让司机把车停在停车场保管吧，给停车费  2023-11-19 22:37:52   \n14181        @在线时间(9:00-21:30) 今天的等候费，压车费，明天送回给货主的费用是多少？  2023-11-19 22:38:08   \n14182  {\"tolist\":[\"LiuAiJun\",\"1025794\",\"worK4lBgAATWX...  2023-11-19 22:38:16   \n14183    @在线时间(9:00-21:30) 今天的等候费，压车费，停车费，明天送回给货主的费用是多少？  2023-11-19 22:38:31   \n14184                     您打开订单，右上角有个投诉，点击投诉，让客诉人员介入给您处理  2023-11-19 22:52:47   \n\n            day  \n0      20231119  \n1      20231119  \n2      20231119  \n3      20231119  \n4      20231119  \n...         ...  \n14180  20231119  \n14181  20231119  \n14182  20231119  \n14183  20231119  \n14184  20231119  \n\n[13848 rows x 12 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>id</th>\n      <th>action</th>\n      <th>corpid</th>\n      <th>from</th>\n      <th>msgid</th>\n      <th>msgtime</th>\n      <th>msgtype</th>\n      <th>roomid</th>\n      <th>tolist</th>\n      <th>content</th>\n      <th>date_time</th>\n      <th>day</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6558e5cf396bd603314d97a1</td>\n      <td>send</td>\n      <td>ww19785ecf93c94bb5</td>\n      <td>wmX07pZAAALofZ2oVhW0GI97YTy_R5MA</td>\n      <td>16148119414446279788_1700324811357_external</td>\n      <td>1700324807115</td>\n      <td>text</td>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...</td>\n      <td>1、装货地：东省佛山市嘉实多护车保养站里官路店@#_#@_2、目的地：广东省东莞市大岭山信立...</td>\n      <td>2023-11-19 00:26:47</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6558e5f2396bd603314d97b1</td>\n      <td>send</td>\n      <td>ww19785ecf93c94bb5</td>\n      <td>wmX07pZAAALofZ2oVhW0GI97YTy_R5MA</td>\n      <td>7916821743428793578_1700324846292_external</td>\n      <td>1700324842683</td>\n      <td>text</td>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...</td>\n      <td>1、装货地：福建省厦门市敦煌实业@#_#@_2、目的地：福建省福州市冠闽产业园@#_#@_@...</td>\n      <td>2023-11-19 00:27:22</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>6558e601396bd603314d97bc</td>\n      <td>send</td>\n      <td>ww19785ecf93c94bb5</td>\n      <td>wmX07pZAAALofZ2oVhW0GI97YTy_R5MA</td>\n      <td>3035325536151975488_1700324864304_external</td>\n      <td>1700324860268</td>\n      <td>text</td>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>wmX07pZAAASQHjLUaV2Koz87YmEsMvAw,wmX07pZAAA88m...</td>\n      <td>1、装货地：福建省福州市冠闽产业园@#_#@_2、目的地：福建省厦门市敦煌实业@#_#@_@...</td>\n      <td>2023-11-19 00:27:40</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>6558edf40e5f7766c37b64f0</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A</td>\n      <td>14218631375976575876_1700326897212_external</td>\n      <td>1700326893076</td>\n      <td>text</td>\n      <td>wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ</td>\n      <td>1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...</td>\n      <td>36825401541870352</td>\n      <td>2023-11-19 01:01:33</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>6558ee080e5f7766c37b64f6</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAsG9MWT3DmAoEFEwHodag-A</td>\n      <td>9305790332453681482_1700326918647_external</td>\n      <td>1700326914156</td>\n      <td>text</td>\n      <td>wrrK4lBgAAJe_Wbh0P9kF4E6leZ0i1tQ</td>\n      <td>1022462,lzyx,WuXuJiaZhuang,164,1022910,XuXiaoC...</td>\n      <td>这单我没接单，怎么取消</td>\n      <td>2023-11-19 01:01:54</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>14180</th>\n      <td>655a1dc80e5f7766c385e750</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>2582321746986539159_1700404676528_external</td>\n      <td>1700404672336</td>\n      <td>text</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...</td>\n      <td>@在线时间(9:00-21:30) 今晚让司机把车停在停车场保管吧，给停车费</td>\n      <td>2023-11-19 22:37:52</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14181</th>\n      <td>655a1dd70e5f7766c385e77a</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>661026385661028883_1700404692281_external</td>\n      <td>1700404688136</td>\n      <td>text</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...</td>\n      <td>@在线时间(9:00-21:30) 今天的等候费，压车费，明天送回给货主的费用是多少？</td>\n      <td>2023-11-19 22:38:08</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14182</th>\n      <td>655a1de10e5f7766c385e791</td>\n      <td>recall</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>4110560313210601338_1700404700545_external</td>\n      <td>1700404696227</td>\n      <td>revoke</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,worK4lBgAATWXyibofqSVFAD5c2_w...</td>\n      <td>{\"tolist\":[\"LiuAiJun\",\"1025794\",\"worK4lBgAATWX...</td>\n      <td>2023-11-19 22:38:16</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14183</th>\n      <td>655a1df00e5f7766c385e7a8</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>wmrK4lBgAAeaLSqOpkBv3UiS_--2QXlA</td>\n      <td>13406161714446366411_1700404715981_external</td>\n      <td>1700404711721</td>\n      <td>text</td>\n      <td>wrrK4lBgAAoNRJ0lNplHROlY0LbsfEoA</td>\n      <td>LiuAiJun,1025794,1021084,1023076,lzyx,1024091,...</td>\n      <td>@在线时间(9:00-21:30) 今天的等候费，压车费，停车费，明天送回给货主的费用是多少？</td>\n      <td>2023-11-19 22:38:31</td>\n      <td>20231119</td>\n    </tr>\n    <tr>\n      <th>14184</th>\n      <td>655a21470e5f7766c385eed0</td>\n      <td>send</td>\n      <td>ww8ba2db9baa1db2b7</td>\n      <td>1030116</td>\n      <td>9462963768104092837_1700405571209_external</td>\n      <td>1700405567394</td>\n      <td>text</td>\n      <td>wrrK4lBgAAChc8CKkTPxghiMLvhSzjPQ</td>\n      <td>10322874,1032337,1032310,XinYu,1021838,1021234...</td>\n      <td>您打开订单，右上角有个投诉，点击投诉，让客诉人员介入给您处理</td>\n      <td>2023-11-19 22:52:47</td>\n      <td>20231119</td>\n    </tr>\n  </tbody>\n</table>\n<p>13848 rows × 12 columns</p>\n</div>"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "df_room = df[df[\"roomid\"] != \"\"]\n",
    "df_single = df[df[\"roomid\"] == \"\"]\n",
    "\n",
    "df_room"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "#如果返回为True，则说明有发货信息；否则说明没有发货信息\n",
    "def has_extractor_info(sentence):\n",
    "    data = {\n",
    "        \"confidence\": 0.9,\n",
    "        \"extractInfoList\": [\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 8,\n",
    "                \"name\": \"车长\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 9,\n",
    "                \"name\": \"车型\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 10,\n",
    "                \"name\": \"目的地\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 11,\n",
    "                \"name\": \"装货地\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 12,\n",
    "                \"name\": \"货名\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 13,\n",
    "                \"name\": \"重量\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 14,\n",
    "                \"name\": \"体积\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 15,\n",
    "                \"name\": \"装货时间\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 16,\n",
    "                \"name\": \"卸货时间\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 17,\n",
    "                \"name\": \"运费预算\"\n",
    "            },\n",
    "            {\n",
    "                \"confidence\": 0.9,\n",
    "                \"id\": 18,\n",
    "                \"name\": \"备注\"\n",
    "            }\n",
    "        ],\n",
    "        \"requestId\": \"9c2f5fc4-014d-4463-bfc3-91e41414ce46\",\n",
    "        \"sentence\": sentence\n",
    "    }\n",
    "\n",
    "    result = requests.post(url='https://predict-phantom.amh-group.com/cold-transportation-extractor/v1/models/cold-transportation-extractor:predict',json=data)\n",
    "    if result.ok:\n",
    "        content = result.text\n",
    "        content = json.loads(content)\n",
    "        slots = content['slots'] if 'slots' in content else []\n",
    "        print(slots)\n",
    "        if slots:\n",
    "            return True\n",
    "    return False"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2023-11-19 08:30:26 角色A:@李大（09:00-18:30） \n",
      "[{'name': '卸货时间', 'id': 16, 'value': '09:00-18:30', 'confidence': 0.9387680131753094}]\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "test_str = '''2023-11-19 08:30:26 角色A:@李大（09:00-18:30） \n",
    "2023-11-19 08:30:32 角色A:image\n",
    "2023-11-19 08:30:38 角色A:image\n",
    "2023-11-19 08:31:37 角色A:这一单司机以经装货，卸货了，然后我支付不了运费\n",
    "2023-11-19 08:33:14 角色B:您下单后，司机接单后要及时签署协议\n",
    "2023-11-19 08:33:51 角色B:这个司机是没有承运资质的，是不能在平台拉您这票货的\n",
    "2023-11-19 08:33:58 角色B:所以您只能取消订单线下支付\n",
    "2023-11-19 08:35:08 角色A:关键是人家订了货车以放过去的路上了，然后我取消人家司机要我补放空费的啦\n",
    "2023-11-19 08:36:10 角色A:在说人家的货以经送到，也按要求把货以卸过了\n",
    "2023-11-19 08:36:11 角色B:不能哦\n",
    "2023-11-19 08:36:27 角色B:司机是被平台拉黑的，本身就不能接平台这类的订单\n",
    "2023-11-19 08:36:38 角色B:image\n",
    "2023-11-19 08:36:41 角色A:image\n",
    "2023-11-19 08:36:47 角色B:周五也是这类的情况，只能换司机接单哦\n",
    "2023-11-19 08:39:10 角色A:关键是我走的运满满专票，人家司机也按我的要求\n",
    "2023-11-19 08:41:02 角色B:这是一条引用/回复消息：@#_#@_\"老吴18688465779：@#_#@_关键是我走的运满满专票，人家司机也按我的要求\"@#_#@_------@#_#@_@老吴18688465779 他不能接单，也就走不了平台哦\n",
    "2023-11-19 08:41:25 角色B:平台上签署完协议才算是接单成功\n",
    "2023-11-19 08:45:12 角色A:voice\n",
    "2023-11-19 08:47:44 角色B:因为没有生效的订单\n",
    "2023-11-19 08:49:15 角色B:平台的发货流程是：您发布货源信息，司机查看抢单，您需要和司机确定好立刻签署协议，之后按平台流程该确认装货就确认，该确认卸货就确认\n",
    "2023-11-19 08:50:33 角色B:平台拉黑司机接单，但不会同时限制他们搜货，他不能接单是司机自身的原因，平台只会对有恶劣行为的司机进行账号的处理，同样也是为了保证后面的货主的发货安全\n",
    "2023-11-19 08:51:24 角色B:流程是这个流程，如果这个司机是您周五就咨询原因的那个，平台已经立刻告知您，这个司机不能接您的订单需要更换司机'''\n",
    "\n",
    "for i in test_str.split(\"\\n\"):\n",
    "    print(i)\n",
    "    print(has_extractor_info(i))\n",
    "    break"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "roomid\nwrX07pZAAA-3GPJZSGLfZRfG9GiDmkDA    2023-11-19 09:27:52 角色A:@顾镕持续匹配\\n2023-11-19 09...\nwrX07pZAAA-Bq3PRqVk1ZNVAHTWtCHaw    2023-11-19 07:33:54 角色A:@调度-阿薇@\\n2023-11-19 07...\nwrX07pZAAA-Mae6dGC0CcMnCBqPbSu0A    2023-11-19 09:47:04 角色A:1、装货地：河间市永兴食品加工厂@#_#@_...\nwrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA    2023-11-19 00:26:47 角色A:1、装货地：东省佛山市嘉实多护车保养站里官路...\nwrX07pZAAA0cw8L9tKdgmXTJm-kcttXw    2023-11-19 10:13:56 角色A:5吨货@#_#@_安丘赵戈咕咕鸡食品@#_#...\n                                                          ...                        \nwrrK4lBgAAzgUhCImKbkG8Lmj3WxKW4g    2023-11-19 12:24:39 角色A:开票提醒[太阳]@#_#@_2023年11月...\nwrrK4lBgAAzjGk1t5BsgJz64Skiz14ag    2023-11-19 12:24:40 角色A:开票提醒[太阳]@#_#@_2023年11月...\nwrrK4lBgAAzkwe9hP7Dh7HzNcUsVNlzQ    2023-11-19 12:24:40 角色A:开票提醒[太阳]@#_#@_2023年11月...\nwrrK4lBgAAzpC3AKE1Uns6N1zoNyoB6w    2023-11-19 12:24:35 角色A:开票提醒[太阳]@#_#@_2023年11月...\nwrrK4lBgAAzuifwESUzZOubOpFIh540g    2023-11-19 12:24:44 角色A:开票提醒[太阳]@#_#@_2023年11月...\nLength: 1587, dtype: object"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import datetime\n",
    "def make_dialogue(groups:pd.DataFrame):\n",
    "    dialogue = []\n",
    "    groups.sort_values(by='msgtime',inplace=True)\n",
    "    role_set = groups['from'].unique()\n",
    "    role_dict = {}\n",
    "    for index,role in enumerate(role_set):\n",
    "        role_dict[role] = chr(index + 65)\n",
    "    for i,row in groups.iterrows():\n",
    "        send_time = row[\"date_time\"]\n",
    "        send_role = row['from']\n",
    "        if row[\"msgtype\"] == 'text':\n",
    "            send_msg = row[\"content\"]\n",
    "        else:\n",
    "            send_msg = row[\"msgtype\"]\n",
    "        dialogue.append(f\"{send_time} 角色{role_dict[send_role]}:{send_msg}\")\n",
    "    return \"\\n\".join(dialogue)\n",
    "room_data = df_room.groupby(\"roomid\").apply(make_dialogue)\n",
    "room_data"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "data": {
      "text/plain": "                                roomid  \\\n0     wrX07pZAAA-3GPJZSGLfZRfG9GiDmkDA   \n1     wrX07pZAAA-Bq3PRqVk1ZNVAHTWtCHaw   \n2     wrX07pZAAA-Mae6dGC0CcMnCBqPbSu0A   \n3     wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n4     wrX07pZAAA0cw8L9tKdgmXTJm-kcttXw   \n...                                ...   \n1582  wrrK4lBgAAzgUhCImKbkG8Lmj3WxKW4g   \n1583  wrrK4lBgAAzjGk1t5BsgJz64Skiz14ag   \n1584  wrrK4lBgAAzkwe9hP7Dh7HzNcUsVNlzQ   \n1585  wrrK4lBgAAzpC3AKE1Uns6N1zoNyoB6w   \n1586  wrrK4lBgAAzuifwESUzZOubOpFIh540g   \n\n                                                      0  \n0     2023-11-19 09:27:52 角色A:@顾镕持续匹配\\n2023-11-19 09...  \n1     2023-11-19 07:33:54 角色A:@调度-阿薇@\\n2023-11-19 07...  \n2     2023-11-19 09:47:04 角色A:1、装货地：河间市永兴食品加工厂@#_#@_...  \n3     2023-11-19 00:26:47 角色A:1、装货地：东省佛山市嘉实多护车保养站里官路...  \n4     2023-11-19 10:13:56 角色A:5吨货@#_#@_安丘赵戈咕咕鸡食品@#_#...  \n...                                                 ...  \n1582  2023-11-19 12:24:39 角色A:开票提醒[太阳]@#_#@_2023年11月...  \n1583  2023-11-19 12:24:40 角色A:开票提醒[太阳]@#_#@_2023年11月...  \n1584  2023-11-19 12:24:40 角色A:开票提醒[太阳]@#_#@_2023年11月...  \n1585  2023-11-19 12:24:35 角色A:开票提醒[太阳]@#_#@_2023年11月...  \n1586  2023-11-19 12:24:44 角色A:开票提醒[太阳]@#_#@_2023年11月...  \n\n[1587 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>roomid</th>\n      <th>0</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>wrX07pZAAA-3GPJZSGLfZRfG9GiDmkDA</td>\n      <td>2023-11-19 09:27:52 角色A:@顾镕持续匹配\\n2023-11-19 09...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>wrX07pZAAA-Bq3PRqVk1ZNVAHTWtCHaw</td>\n      <td>2023-11-19 07:33:54 角色A:@调度-阿薇@\\n2023-11-19 07...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>wrX07pZAAA-Mae6dGC0CcMnCBqPbSu0A</td>\n      <td>2023-11-19 09:47:04 角色A:1、装货地：河间市永兴食品加工厂@#_#@_...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>2023-11-19 00:26:47 角色A:1、装货地：东省佛山市嘉实多护车保养站里官路...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>wrX07pZAAA0cw8L9tKdgmXTJm-kcttXw</td>\n      <td>2023-11-19 10:13:56 角色A:5吨货@#_#@_安丘赵戈咕咕鸡食品@#_#...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>1582</th>\n      <td>wrrK4lBgAAzgUhCImKbkG8Lmj3WxKW4g</td>\n      <td>2023-11-19 12:24:39 角色A:开票提醒[太阳]@#_#@_2023年11月...</td>\n    </tr>\n    <tr>\n      <th>1583</th>\n      <td>wrrK4lBgAAzjGk1t5BsgJz64Skiz14ag</td>\n      <td>2023-11-19 12:24:40 角色A:开票提醒[太阳]@#_#@_2023年11月...</td>\n    </tr>\n    <tr>\n      <th>1584</th>\n      <td>wrrK4lBgAAzkwe9hP7Dh7HzNcUsVNlzQ</td>\n      <td>2023-11-19 12:24:40 角色A:开票提醒[太阳]@#_#@_2023年11月...</td>\n    </tr>\n    <tr>\n      <th>1585</th>\n      <td>wrrK4lBgAAzpC3AKE1Uns6N1zoNyoB6w</td>\n      <td>2023-11-19 12:24:35 角色A:开票提醒[太阳]@#_#@_2023年11月...</td>\n    </tr>\n    <tr>\n      <th>1586</th>\n      <td>wrrK4lBgAAzuifwESUzZOubOpFIh540g</td>\n      <td>2023-11-19 12:24:44 角色A:开票提醒[太阳]@#_#@_2023年11月...</td>\n    </tr>\n  </tbody>\n</table>\n<p>1587 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "room_data_frame = room_data.to_frame()\n",
    "room_data_frame.reset_index(inplace=True)\n",
    "room_data_frame"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [
    {
     "data": {
      "text/plain": "                              room_id  \\\n0    wrX07pZAAA-Bq3PRqVk1ZNVAHTWtCHaw   \n1    wrX07pZAAA-Mae6dGC0CcMnCBqPbSu0A   \n2    wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA   \n3    wrX07pZAAA0cw8L9tKdgmXTJm-kcttXw   \n4    wrX07pZAAA1WN4Lk4C0tgD_SpwN7rdXg   \n..                                ...   \n424  wrrK4lBgAAz-18M7xy-8He8qhYPSglUg   \n425  wrrK4lBgAAzJgDuVlbNL1ou1kEXrCOsQ   \n426  wrrK4lBgAAzRsxAJiV3wSKEtzMpM09gw   \n427  wrrK4lBgAAzSBJu_Gjs1zJvB7lJsoz5A   \n428  wrrK4lBgAAzYOxGsqpEyGKgRczXJCwzw   \n\n                                              dialogue  \n0    2023-11-19 07:33:54 角色A:@调度-阿薇@\\n2023-11-19 07...  \n1    2023-11-19 09:47:04 角色A:1、装货地：河间市永兴食品加工厂@#_#@_...  \n2    2023-11-19 00:26:47 角色A:1、装货地：东省佛山市嘉实多护车保养站里官路...  \n3    2023-11-19 10:13:56 角色A:5吨货@#_#@_安丘赵戈咕咕鸡食品@#_#...  \n4    2023-11-19 10:30:37 角色A:发货地址：西华双汇肉鸡屠宰 河南省周口市西华...  \n..                                                 ...  \n424  2023-11-19 13:24:50 角色A:订单号:\\t3678902424101272...  \n425  2023-11-19 08:12:23 角色A:@永不言败 师傅下午装的货 明天卸可以吗 \\...  \n426  2023-11-19 07:48:42 角色A:我问问押车半天需要补多少钱\\n2023-11...  \n427  2023-11-19 10:46:56 角色A:@秋梨膏 （9:00-18:30） \\n20...  \n428  2023-11-19 16:36:02 角色A:image\\n2023-11-19 16:3...  \n\n[429 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>room_id</th>\n      <th>dialogue</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>wrX07pZAAA-Bq3PRqVk1ZNVAHTWtCHaw</td>\n      <td>2023-11-19 07:33:54 角色A:@调度-阿薇@\\n2023-11-19 07...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>wrX07pZAAA-Mae6dGC0CcMnCBqPbSu0A</td>\n      <td>2023-11-19 09:47:04 角色A:1、装货地：河间市永兴食品加工厂@#_#@_...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>wrX07pZAAA-zB_YQmmP5jEmSH6-zQqkA</td>\n      <td>2023-11-19 00:26:47 角色A:1、装货地：东省佛山市嘉实多护车保养站里官路...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>wrX07pZAAA0cw8L9tKdgmXTJm-kcttXw</td>\n      <td>2023-11-19 10:13:56 角色A:5吨货@#_#@_安丘赵戈咕咕鸡食品@#_#...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>wrX07pZAAA1WN4Lk4C0tgD_SpwN7rdXg</td>\n      <td>2023-11-19 10:30:37 角色A:发货地址：西华双汇肉鸡屠宰 河南省周口市西华...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>424</th>\n      <td>wrrK4lBgAAz-18M7xy-8He8qhYPSglUg</td>\n      <td>2023-11-19 13:24:50 角色A:订单号:\\t3678902424101272...</td>\n    </tr>\n    <tr>\n      <th>425</th>\n      <td>wrrK4lBgAAzJgDuVlbNL1ou1kEXrCOsQ</td>\n      <td>2023-11-19 08:12:23 角色A:@永不言败 师傅下午装的货 明天卸可以吗 \\...</td>\n    </tr>\n    <tr>\n      <th>426</th>\n      <td>wrrK4lBgAAzRsxAJiV3wSKEtzMpM09gw</td>\n      <td>2023-11-19 07:48:42 角色A:我问问押车半天需要补多少钱\\n2023-11...</td>\n    </tr>\n    <tr>\n      <th>427</th>\n      <td>wrrK4lBgAAzSBJu_Gjs1zJvB7lJsoz5A</td>\n      <td>2023-11-19 10:46:56 角色A:@秋梨膏 （9:00-18:30） \\n20...</td>\n    </tr>\n    <tr>\n      <th>428</th>\n      <td>wrrK4lBgAAzYOxGsqpEyGKgRczXJCwzw</td>\n      <td>2023-11-19 16:36:02 角色A:image\\n2023-11-19 16:3...</td>\n    </tr>\n  </tbody>\n</table>\n<p>429 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def check_has_cargo_info(item):\n",
    "    dialogues = item.split(\"\\n\")\n",
    "    has_cargo = 0\n",
    "\n",
    "    if \"开票提醒\" in item:\n",
    "        j = 1\n",
    "    if len(dialogues) >= 80 or len(dialogues) <= 4:\n",
    "        return has_cargo\n",
    "    for sen in dialogues:\n",
    "        if has_extractor_info(sen):\n",
    "            has_cargo = 1\n",
    "            break\n",
    "    return has_cargo\n",
    "\n",
    "dialogues = []\n",
    "rooms_ids = []\n",
    "for index, row in room_data_frame.iterrows():\n",
    "    if len(dialogues) > 800:\n",
    "        break\n",
    "    if check_has_cargo_info(row[0]):\n",
    "        dialogues.append(row[0])\n",
    "        rooms_ids.append(row[\"roomid\"])\n",
    "\n",
    "save_room_data = pd.DataFrame({\"room_id\":rooms_ids,\"dialogue\":dialogues})\n",
    "save_room_data"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\fengfeng.qiu\\AppData\\Local\\Temp\\ipykernel_14228\\2285521213.py:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_single[\"roomid\"] = df_single.apply(make_fake_room_id,axis=1)\n"
     ]
    },
    {
     "data": {
      "text/plain": "roomid\n10136_A1038857                                 2023-11-10 17:38:50 角色A:丁超接受宁波的价格1000＋6.6\\n202...\n650238567924db5d8e3268d184dbc477_FaCaiBaoFu    2023-11-06 09:37:33 角色A:菏泽好多米食品是你客户么[捂脸]\\n2023...\n650238567924db5d8e3268d184dbc477_GuRong        2023-11-10 13:19:56 角色A:1、线路：装货地：山东省菏泽市牡丹区贵源粉业...\n650238567924db5d8e3268d184dbc477_LiYanYing     2023-11-06 08:29:57 角色A:抓紧一下时间 \\n2023-11-06 08...\n650238567924db5d8e3268d184dbc477_WuJiaHao      2023-11-06 09:37:54 角色A:朝阳成交了是吧\\n2023-11-06 09...\n                                                                     ...                        \nwmX07pZAAAxWstU_4yCZe0RV8PB-3DQg_zyz           2023-11-08 16:35:16 角色A:我已经添加了你，现在我们可以开始聊天了。\\n...\nwmX07pZAAAxZrsyfjCiwh1xWsK_yLr_g_zyz           2023-11-09 08:00:43 角色A:我已经添加了你，现在我们可以开始聊天了。\\n...\nwmX07pZAAAxznxIyO07jbYtZ98WnYkyg_zyz           2023-11-08 20:41:05 角色A:我已经添加了你，现在我们可以开始聊天了。\\n...\nwmX07pZAAAyZrl64X-Qms4ZZhB9giSvw_zyz           2023-11-11 19:01:26 角色A:我已经添加了你，现在我们可以开始聊天了。\\n...\nwmX07pZAAAz566YqqLxvqDO1LGMrN9jA_zyz           2023-11-10 11:59:02 角色A:我已经添加了你，现在我们可以开始聊天了。\\n...\nLength: 1376, dtype: object"
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def make_fake_room_id(item):\n",
    "    ids = [str(item['from']),str(item[\"tolist\"])]\n",
    "    ids.sort()\n",
    "    return \"_\".join(ids)\n",
    "\n",
    "df_single[\"roomid\"] = df_single.apply(make_fake_room_id,axis=1)\n",
    "\n",
    "single_room_data = df_single.groupby(\"roomid\").apply(make_dialogue)\n",
    "single_room_data"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "outputs": [
    {
     "data": {
      "text/plain": "                                         room_id  \\\n0                                 10136_A1038857   \n1    650238567924db5d8e3268d184dbc477_FaCaiBaoFu   \n2        650238567924db5d8e3268d184dbc477_GuRong   \n3     650238567924db5d8e3268d184dbc477_LiYanYing   \n4      650238567924db5d8e3268d184dbc477_WuJiaHao   \n..                                           ...   \n96      GuanXin_wmX07pZAAAdjgmmB4wd11eZO_PxBQRdA   \n97      GuanXin_wmX07pZAAAoYVKjdn60dIPCEaTqXwqNA   \n98      GuanXin_wmX07pZAAAoZuv-rWB7y1MLsA8q2vsSA   \n99      GuanXin_wmX07pZAAAqyTxnctx-fr6WRYHtlYcrA   \n100     GuanXin_wmX07pZAAAvnZjR9_D1bxJT-YoXkjetQ   \n\n                                              dialogue  \n0    2023-11-10 17:38:50 角色A:丁超接受宁波的价格1000＋6.6\\n202...  \n1    2023-11-06 09:37:33 角色A:菏泽好多米食品是你客户么[捂脸]\\n2023...  \n2    2023-11-10 13:19:56 角色A:1、线路：装货地：山东省菏泽市牡丹区贵源粉业...  \n3    2023-11-06 08:29:57 角色A:抓紧一下时间 \\n2023-11-06 08...  \n4    2023-11-06 09:37:54 角色A:朝阳成交了是吧\\n2023-11-06 09...  \n..                                                 ...  \n96   2023-11-08 15:43:39 角色A:我通过了你的联系人验证请求，现在我们可以开始...  \n97   2023-11-04 15:10:24 角色A:要收多少费用\\n2023-11-04 15:...  \n98   2023-11-08 15:52:34 角色A:我通过了你的联系人验证请求，现在我们可以开始...  \n99   2023-11-10 17:37:43 角色A:我通过了你的联系人验证请求，现在我们可以开始...  \n100  2023-11-08 16:00:35 角色A:我通过了你的联系人验证请求，现在我们可以开始...  \n\n[101 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>room_id</th>\n      <th>dialogue</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>10136_A1038857</td>\n      <td>2023-11-10 17:38:50 角色A:丁超接受宁波的价格1000＋6.6\\n202...</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>650238567924db5d8e3268d184dbc477_FaCaiBaoFu</td>\n      <td>2023-11-06 09:37:33 角色A:菏泽好多米食品是你客户么[捂脸]\\n2023...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>650238567924db5d8e3268d184dbc477_GuRong</td>\n      <td>2023-11-10 13:19:56 角色A:1、线路：装货地：山东省菏泽市牡丹区贵源粉业...</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>650238567924db5d8e3268d184dbc477_LiYanYing</td>\n      <td>2023-11-06 08:29:57 角色A:抓紧一下时间 \\n2023-11-06 08...</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>650238567924db5d8e3268d184dbc477_WuJiaHao</td>\n      <td>2023-11-06 09:37:54 角色A:朝阳成交了是吧\\n2023-11-06 09...</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>GuanXin_wmX07pZAAAdjgmmB4wd11eZO_PxBQRdA</td>\n      <td>2023-11-08 15:43:39 角色A:我通过了你的联系人验证请求，现在我们可以开始...</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>GuanXin_wmX07pZAAAoYVKjdn60dIPCEaTqXwqNA</td>\n      <td>2023-11-04 15:10:24 角色A:要收多少费用\\n2023-11-04 15:...</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>GuanXin_wmX07pZAAAoZuv-rWB7y1MLsA8q2vsSA</td>\n      <td>2023-11-08 15:52:34 角色A:我通过了你的联系人验证请求，现在我们可以开始...</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>GuanXin_wmX07pZAAAqyTxnctx-fr6WRYHtlYcrA</td>\n      <td>2023-11-10 17:37:43 角色A:我通过了你的联系人验证请求，现在我们可以开始...</td>\n    </tr>\n    <tr>\n      <th>100</th>\n      <td>GuanXin_wmX07pZAAAvnZjR9_D1bxJT-YoXkjetQ</td>\n      <td>2023-11-08 16:00:35 角色A:我通过了你的联系人验证请求，现在我们可以开始...</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "single_room_data_frame = single_room_data.to_frame()\n",
    "single_room_data_frame.reset_index(inplace=True)\n",
    "\n",
    "dialogues = []\n",
    "rooms_ids = []\n",
    "for index, row in single_room_data_frame.iterrows():\n",
    "    if len(dialogues) > 100:\n",
    "        break\n",
    "    if check_has_cargo_info(row[0]):\n",
    "        dialogues.append(row[0])\n",
    "        rooms_ids.append(row[\"roomid\"])\n",
    "\n",
    "save_single_room_data = pd.DataFrame({\"room_id\":rooms_ids,\"dialogue\":dialogues})\n",
    "save_single_room_data"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [],
   "source": [
    "save_single_room_data = save_single_room_data[[\"dialogue\"]]\n",
    "save_single_room_data.to_json(\"冷运私聊对话记录.json\")\n",
    "save_room_data = save_room_data[[\"dialogue\"]]\n",
    "save_room_data.to_json(\"冷运群聊对话记录.json\")"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "outputs": [],
   "source": [
    "\n",
    "save_room_data_format = save_room_data[[\"dialogue\"]]\n",
    "save_room_data_format[\"chatlog\"] = save_room_data_format[\"dialogue\"]\n",
    "save_room_data_format.to_json(\"冷运群聊对话记录.json\",orient='records',force_ascii=False)\n",
    "\n",
    "save_single_room_data = save_single_room_data[[\"dialogue\"]]\n",
    "save_single_room_data[\"chatlog\"] = save_single_room_data[\"dialogue\"]\n",
    "save_single_room_data.to_json(\"冷运私聊对话记录.json\",orient='records',force_ascii=False)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "data": {
      "text/plain": "(429, 2)"
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "save_room_data.shape"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [],
   "source": [
    "save_room_data_format = save_room_data[[\"dialogue\"]]\n",
    "save_room_data_format[\"chatlog\"] = save_room_data_format[\"dialogue\"]\n",
    "\n",
    "save_room_data_format1 = save_room_data_format.iloc[0:100,:]\n",
    "save_room_data_format1.to_json(\"冷运群聊对话记录1.json\",orient='records',force_ascii=False)\n",
    "\n",
    "save_room_data_format2 = save_room_data_format.iloc[100:200,:]\n",
    "save_room_data_format2.to_json(\"冷运群聊对话记录2.json\",orient='records',force_ascii=False)\n",
    "\n",
    "save_room_data_format3 = save_room_data_format.iloc[200:300,:]\n",
    "save_room_data_format3.to_json(\"冷运群聊对话记录3.json\",orient='records',force_ascii=False)\n",
    "\n",
    "save_room_data_format4 = save_room_data_format.iloc[300:400,:]\n",
    "save_room_data_format4.to_json(\"冷运群聊对话记录4.json\",orient='records',force_ascii=False)\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
